package org.emailclassifier.preprocessing;

import java.util.HashMap;
import java.util.Set;

public class DataSetDocument {
	
	private String m_body;
	private HashMap<String, Integer> m_termFreq = new HashMap<String, Integer>();
	private boolean m_stoplist;

	public DataSetDocument(boolean stoplist)
	{
		m_stoplist = stoplist; 
	}
	
	public DataSetDocument(String body, boolean stoplist)
	{
		m_stoplist = stoplist;
		setBody(body);
	}
	
	public void setBody(String body)
	{
		m_body = body;

		m_termFreq.clear();
		m_termFreq.putAll(Freq.getWordFrequencies(body, m_stoplist));
	}
	
	public String getBody()
	{
		return m_body;
	}
	
	public HashMap<String, Integer> getTermFrequencies()
	{
		return m_termFreq;
	}
	
	public Set<String> getVocabulary()
	{
		return m_termFreq.keySet();
	}
}
